*** Import P-Index raw data
use "/Users/elle/Desktop/3 papers/Concept and Measure/replication materials/P-Index.dta", clear

*** Sample = EU + Iceland + Norway + Switzerland + UK - Cyprus
gen ccode=.
replace ccode=40 if country=="Austria"
replace ccode=56 if country=="Belgium"
replace ccode=100 if country=="Bulgaria"
replace ccode=191 if country=="Croatia"
replace ccode=203 if country=="Czech Republic"
replace ccode=208 if country=="Denmark"
replace ccode=233 if country=="Estonia"
replace ccode=246 if country=="Finland"
replace ccode=250 if country=="France"
replace ccode=276 if country=="Germany"
replace ccode=300 if country=="Greece"
replace ccode=348 if country=="Hungary"
replace ccode=352 if country=="Iceland"
replace ccode=372 if country=="Ireland"
replace ccode=380 if country=="Italy"
replace ccode=428 if country=="Latvia"
replace ccode=440 if country=="Lithuania"
replace ccode=442 if country=="Luxembourg"
replace ccode=470 if country=="Malta"
replace ccode=528 if country=="Netherlands"
replace ccode=578 if country=="Norway"
replace ccode=616 if country=="Poland"
replace ccode=620 if country=="Portugal"
replace ccode=642 if country=="Romania"
replace ccode=703 if country=="Slovakia"
replace ccode=703 if country=="Slovak Republic"
replace ccode=705 if country=="Slovenia"
replace ccode=724 if country=="Spain"
replace ccode=752 if country=="Sweden"
replace ccode=756 if country=="Switzerland"
replace ccode=826 if country=="United Kingdom"
* drop Iceland, Ireland, Luxembourg, Malta for final sample of 26 countries - Cyrus not included in dataset to begin with
drop if ccode==352 | ccode==372 | ccode==442 | ccode==470

*** Index Aggregation

** Robbery
rename robberyfirstoffensejailrangeinmo rob_range_first
split rob_range_first, p("-" )
split rob_range_first1, p("+" )
gen rob_range_first_plus=1 if rob_range_first11!=rob_range_first1
gen rob_range_first_life=1 if rob_range_first2=="life"
destring rob_range_first2, replace ignore("life")
destring rob_range_first11, replace
replace rob_range_first2=rob_range_first11 if rob_range_first_plus!=1 & rob_range_first2==.

egen rob_range_first_max=max(rob_range_first2)
replace rob_range_first2=rob_range_first_max*1.2 if rob_range_first_life==1
egen mean_rob_range_first2=mean(rob_range_first2)
replace rob_range_first2=mean_rob_range_first2 if rob_range_first2==.
gen rob_range=rob_range_first2-rob_range_first11

gen rob_average=(rob_range_first11+rob_range_first2)/2

rename aggravatedoffensejailrangeinmont rob_range_ag
split rob_range_ag, p("-" )
split rob_range_ag1, p("+" )
gen rob_range_ag_plus=1 if rob_range_ag11!=rob_range_ag1
gen rob_range_ag_life=1 if rob_range_ag2=="life"
destring rob_range_ag2, replace ignore("life")
destring rob_range_ag11, replace
replace rob_range_ag2=rob_range_ag11 if rob_range_ag_plus!=1 & rob_range_ag2==.

egen rob_range_ag_max=max(rob_range_ag2)
replace rob_range_ag2=rob_range_ag_max*1.2 if rob_range_ag_life==1
egen mean_rob_range_ag2=mean(rob_range_ag2)
replace rob_range_ag2=mean_rob_range_ag2 if rob_range_ag2==.
gen rob_range_aggrav=rob_range_ag2-rob_range_ag11

gen rob_average_aggrav=(rob_range_ag11+rob_range_ag2)/2

*for average sentence first offense
egen rob_avsent1 = std(rob_average)
*for sentence range first offense
egen rob_rangesent1 = std(rob_range)
*for average aggravated sentence
egen rob_avsent2 = std(rob_average_aggrav)
*for average aggravated sentence range
egen rob_rangesent2 = std(rob_range_aggrav)

gen sum_rob=rob_manmin1+rob_avsent1+rob_rangesent1+rob_option+rob_recid+rob_aggrav+rob_manmin2+rob_avsent2+rob_rangesent2+rob_weapon
replace sum_rob=sum_rob/10


** Rape
rename rapefirstoffensejailrangeinmonth rape_range_first
split rape_range_first, p("-" )
split rape_range_first1, p("+" )
gen rape_range_first_plus=1 if rape_range_first11!=rape_range_first1
gen rape_range_first_life=1 if rape_range_first2=="life"
destring rape_range_first2, replace ignore("life")
destring rape_range_first11, replace
replace rape_range_first2=rape_range_first11 if rape_range_first_plus!=1 & rape_range_first2==.

egen rape_range_first_max=max(rape_range_first2)
replace rape_range_first2=rape_range_first_max*1.2 if rape_range_first_life==1
egen mean_rape_range_first2=mean(rape_range_first2)
replace rape_range_first2=mean_rape_range_first2 if rape_range_first2==.
gen rape_range=rape_range_first2-rape_range_first11

gen rape_average=(rape_range_first11+rape_range_first2)/2

rename v25 rape_range_ag
split rape_range_ag, p("-" )
split rape_range_ag1, p("+" )
gen rape_range_ag_plus=1 if rape_range_ag11!=rape_range_ag1
gen rape_range_ag_life=1 if rape_range_ag2=="life"
destring rape_range_ag2, replace ignore("life")
destring rape_range_ag11, replace
replace rape_range_ag2=rape_range_ag11 if rape_range_ag_plus!=1 & rape_range_ag2==.

egen rape_range_ag_max=max(rape_range_ag2)
replace rape_range_ag2=rape_range_ag_max*1.2 if rape_range_ag_life==1
egen mean_rape_range_ag2=mean(rape_range_ag2)
replace rape_range_ag2=mean_rape_range_ag2 if rape_range_ag2==.
gen rape_range_aggrav=rape_range_ag2-rape_range_ag11

gen rape_average_aggrav=(rape_range_ag11+rape_range_ag2)/2

*for average sentence first offense
egen rape_avsent1 = std(rape_average) 
*for sentence range first offense
egen rape_rangesent1 = std(rape_range)
*for average aggravated sentence
egen rape_avsent2 = std(rape_average_aggrav)
*for average aggravated sentence range
egen rape_rangesent2 = std(rape_range_aggrav)

gen sum_rape=rape_manmin1+rape_avsent1+rape_rangesent1+rape_option+rape_recid+rape_aggrav+rape_manmin2+rape_avsent2+rape_rangesent2+rape_weapon+rape_age
replace sum_rape=sum_rape/11


** Manslaughter
rename manslaughterfirstoffensejailrang mans_range_first
split mans_range_first, p("-" )
split mans_range_first1, p("+" )
gen mans_range_first_plus=1 if mans_range_first11!=mans_range_first1
gen mans_range_first_life=1 if mans_range_first2=="life"
replace mans_range_first_life=1 if mans_range_first1=="life"
destring mans_range_first2, replace ignore("life")
destring mans_range_first11, replace ignore("life")
replace mans_range_first2=mans_range_first11 if mans_range_first_plus!=1 & mans_range_first2==.

egen mans_range_first_max=max(mans_range_first2)
replace mans_range_first2=mans_range_first_max*1.2 if mans_range_first_life==1
egen mean_mans_range_first2=mean(mans_range_first2)
replace mans_range_first2=mean_mans_range_first2 if mans_range_first2==.
replace mans_range_first11=0 if mans_range_first11==. & mans_range_first_life==1

gen mans_range=mans_range_first2-mans_range_first11

gen mans_average=(mans_range_first11+mans_range_first2)/2

*for average sentence first offense
egen mans_avsent1 = std(mans_average) 
*for sentence range first offense
egen mans_rangesent1 = std(mans_range)

gen sum_mans=mans_manmin1+mans_avsent1+mans_rangesent1+mans_option+mans_recid
replace sum_mans=sum_mans/5


** Fraud
rename basicminimumoffensejailrangeinmo fraud_range_first
split fraud_range_first, p("-" )
split fraud_range_first1, p("+" )
gen fraud_range_first_plus=1 if fraud_range_first11!=fraud_range_first1
gen fraud_range_first_life=1 if fraud_range_first2=="life"
destring fraud_range_first2, replace ignore("life")
destring fraud_range_first11, replace
replace fraud_range_first2=fraud_range_first11 if fraud_range_first_plus!=1 & fraud_range_first2==.

egen fraud_range_first_max=max(fraud_range_first2)
replace fraud_range_first2=fraud_range_first_max*1.2 if fraud_range_first_life==1
egen mean_fraud_range_first2=mean(fraud_range_first2)
replace fraud_range_first2=mean_fraud_range_first2 if fraud_range_first2==.
gen fraud_range=fraud_range_first2-fraud_range_first11

gen fraud_average=(fraud_range_first11+fraud_range_first2)/2

rename aggravoffensejailrangeinmonths fraud_range_ag
split fraud_range_ag, p("-" )
split fraud_range_ag1, p("+" )
gen fraud_range_ag_plus=1 if fraud_range_ag11!=fraud_range_ag1
gen fraud_range_ag_life=1 if fraud_range_ag2=="life"
destring fraud_range_ag2, replace ignore("life")
destring fraud_range_ag11, replace
replace fraud_range_ag2=fraud_range_ag11 if fraud_range_ag_plus!=1 & fraud_range_ag2==.

egen fraud_range_ag_max=max(fraud_range_ag2)
replace fraud_range_ag2=fraud_range_ag_max*1.2 if fraud_range_ag_life==1
egen mean_fraud_range_ag2=mean(fraud_range_ag2)
replace fraud_range_ag2=mean_fraud_range_ag2 if fraud_range_ag2==.
gen fraud_range_aggrav=fraud_range_ag2-fraud_range_ag11

gen fraud_average_aggrav=(fraud_range_ag11+fraud_range_ag2)/2


*for average sentence first offense
egen fraud_avsent1 = std(fraud_average) 
*for sentence range first offense
egen fraud_rangesent1 = std(fraud_range)
*for average aggravated sentence
egen fraud_avsent2 = std(fraud_average_aggrav)
*for average aggravated sentence range
egen fraud_rangesent2 = std(fraud_range_aggrav)


gen sum_fraud=fraud_manmin1+fraud_avsent1+fraud_rangesent1+fraud_option+fraud_recid+fraud_aggrav+fraud_manmin2+fraud_avsent2+fraud_rangesent2
replace sum_fraud=sum_fraud/9


** Drugs - Possession
rename firstoffensejailrangeinmonths drugp_range_first
split drugp_range_first, p("-" )
split drugp_range_first1, p("+" )
gen drugp_range_first_plus=1 if drugp_range_first11!=drugp_range_first1
gen drugp_range_first_life=1 if drugp_range_first2=="life"
destring drugp_range_first2, replace ignore("life")
destring drugp_range_first11, replace
replace drugp_range_first2=drugp_range_first11 if drugp_range_first_plus!=1 & drugp_range_first2==.

egen drugp_range_first_max=max(drugp_range_first2)
replace drugp_range_first2=drugp_range_first_max*1.2 if drugp_range_first_life==1
egen mean_drugp_range_first2=mean(drugp_range_first2)
replace drugp_range_first2=mean_drugp_range_first2 if drugp_range_first2==.
gen drugp_range=drugp_range_first2-drugp_range_first11

gen drugp_average=(drugp_range_first11+drugp_range_first2)/2

*for average sentence first offense
egen drugp_avsent1 = std(drugp_average) 
*for sentence range first offense
egen drugp_rangesent1 = std(drugp_range)

gen sum_drugp=drugp_manmin1+drugp_avsent1+drugp_rangesent1+drugp_option+drugp_recid+drugp_marij
replace sum_drugp=sum_drugp/6


** Drugs - Distribution
rename v55 drugd_range_first
split drugd_range_first, p("-" )
split drugd_range_first1, p("+" )
gen drugd_range_first_plus=1 if drugd_range_first11!=drugd_range_first1
gen drugd_range_first_life=1 if drugd_range_first2=="life"
destring drugd_range_first2, replace ignore("life")
destring drugd_range_first11, replace
replace drugd_range_first2=drugd_range_first11 if drugd_range_first_plus!=1 & drugd_range_first2==.

egen drugd_range_first_max=max(drugd_range_first2)
replace drugd_range_first2=drugd_range_first_max*1.2 if drugd_range_first_life==1
egen mean_drugd_range_first2=mean(drugd_range_first2)
replace drugd_range_first2=mean_drugd_range_first2 if drugd_range_first2==.
gen drugd_range=drugd_range_first2-drugd_range_first11

gen drugd_average=(drugd_range_first11+drugd_range_first2)/2

*for average sentence first offense
egen drugd_avsent1 = std(drugd_average) 
*for sentence range first offense
egen drugd_rangesent1 = std(drugd_range)

gen sum_drugd=drugd_manmin1+drugd_avsent1+drugd_rangesent1+drupd_option+drugd_recid+drugd_marij
replace sum_drugd=sum_drugd/6


** Aggregate P-Index
gen sum_total= sum_rob+sum_rape+sum_mans+sum_fraud+sum_drugp+sum_drugd


*** Descriptive P-Index Visualization
** Descriptives and quartiles
codebook sum_total
sum sum_total, detail

** Table 1
order country sum_rob sum_rape sum_mans sum_fraud sum_drugp sum_drugd sum_total
sort sum_total

** Figure 2
graph hbar sum_total, over(country, sort(sum_total) descending)


*** Intention vs. Implementation
** Merge in prison population dataset and newer/older EU membership
merge 1:1 country using "/Users/elle/Desktop/3 papers/Concept and Measure/replication materials/ppop and old.dta"
drop _merge
drop if ccode==.

* Average prison population
gen ppop_average=ppop_2008+ppop_2009+ppop_2010+ppop_2011+ppop_2012+ppop_2013+ppop_2014
replace ppop_average=ppop_average/7

	* prison population is missing in 2014 for Belgium and Norway - averages calculated for 2008-2013
gen ppop_average1=ppop_2008+ppop_2009+ppop_2010+ppop_2011+ppop_2012+ppop_2013
replace ppop_average1=ppop_average1/6
replace ppop_average=ppop_average1 if ccode==56
replace ppop_average=ppop_average1 if ccode==578


* Correlations - prison population  to P-Index
** Table 2
pwcorr sum_total ppop_average
*0.1
pwcorr sum_total ppop_average if old==1, star(99)
*0.47
pwcorr sum_total ppop_average if old==0, star(99)
*-0.64

** merge in ISO codes
merge 1:1 country using "/Users/elle/Desktop/3 papers/Concept and Measure/replication materials/iso.dta"
drop _merge
drop if iso=="ISL" | iso=="IRL" | iso=="LUX" | iso=="MLT"

** Figure 4
twoway (scatter sum_total ppop_average if old==1, mlabel(iso)) (lfit sum_total ppop_average if old==1) || (scatter sum_total ppop_average if old==0, mlabel(iso)) (lfit sum_total ppop_average if old==0) 


*** Capacity - Fragile State Index
** Merge in FSI data
merge 1:1 country using "/Users/elle/Desktop/3 papers/Concept and Measure/replication materials/fsi.dta"
drop _merge
drop if ccode==.

** Figure 5
graph hbar fsi_average, over(country, sort(fsi_average) descending)
twoway (scatter sum_total fsi_average, mlabel(iso)) (lfit sum_total fsi_average)



*** Robustness Checks

** 1. Double weight the average sentence indicator
gen sum_rob2=rob_manmin1+(rob_avsent1*2)+rob_rangesent1+rob_option+rob_recid+rob_aggrav+rob_manmin2+(rob_avsent2*2)+rob_rangesent2+rob_weapon
replace sum_rob2=sum_rob2/12
gen sum_rape2=rape_manmin1+(rape_avsent1*2)+rape_rangesent1+rape_option+rape_recid+rape_aggrav+rape_manmin2+(rape_avsent2*2)+rape_rangesent2+rape_weapon+rape_age
replace sum_rape2=sum_rape2/13
gen sum_mans2=mans_manmin1+(mans_avsent1*2)+mans_rangesent1+mans_option+mans_recid
replace sum_mans2=sum_mans2/6
gen sum_fraud2=fraud_manmin1+(fraud_avsent1*2)+fraud_rangesent1+fraud_option+fraud_recid+fraud_aggrav+fraud_manmin2+(fraud_avsent2*2)+fraud_rangesent2
replace sum_fraud2=sum_fraud2/11
gen sum_drugp2=drugp_manmin1+(drugp_avsent1*2)+drugp_rangesent1+drugp_option+drugp_recid+drugp_marij
replace sum_drugp2=sum_drugp2/7
gen sum_drugd2=drugd_manmin1+(drugd_avsent1*2)+drugd_rangesent1+drupd_option+drugd_recid+drugd_marij
replace sum_drugd2=sum_drugd2/7

gen sum_total2= sum_rob2+sum_rape2+sum_mans2+sum_fraud2+sum_drugp2+sum_drugd2

corr sum_total sum_total2
*0.98

*correlation with prison population 
corr sum_total2 ppop_average
*0.1
corr sum_total2 ppop_average if old==1
*0.49
corr sum_total2 ppop_average if old==0
*-0.61
twoway (scatter sum_total2 ppop_average if old==1, mlabel(iso)) (lfit sum_total2 ppop_average if old==1) || (scatter sum_total2 ppop_average if old==0, mlabel(iso)) (lfit sum_total2 ppop_average if old==0) 


** 2. Excluding the range indicator from aggregation
gen sum_rob1=rob_manmin1+rob_avsent1+rob_option+rob_recid+rob_aggrav+rob_manmin2+rob_avsent2+rob_weapon
replace sum_rob1=sum_rob1/8
 gen sum_rape1=rape_manmin1+rape_avsent1+rape_option+rape_recid+rape_aggrav+rape_manmin2+rape_avsent2+rape_weapon+rape_age
replace sum_rape1=sum_rape1/9
 gen sum_mans1=mans_manmin1+mans_avsent1+mans_option+mans_recid
replace sum_mans1=sum_mans1/4
gen sum_fraud1=fraud_manmin1+fraud_avsent1+fraud_option+fraud_recid+fraud_aggrav+fraud_manmin2+fraud_avsent2
replace sum_fraud1=sum_fraud1/7
gen sum_drugp1=drugp_manmin1+drugp_avsent1+drugp_option+drugp_recid+drugp_marij
replace sum_drugp1=sum_drugp1/5
gen sum_drugd1=drugd_manmin1+drugd_avsent1+drupd_option+drugd_recid+drugd_marij
replace sum_drugd1=sum_drugd1/5

gen sum_total1= sum_rob1+sum_rape1+sum_mans1+sum_fraud1+sum_drugp1+sum_drugd1


corr sum_total sum_total1
*0.83

*correlation with prison population 
corr sum_total1 ppop_average
*0.02
corr sum_total1 ppop_average if old==1
*0.33
corr sum_total1 ppop_average if old==0
*-0.75
twoway (scatter sum_total1 ppop_average if old==1, mlabel(iso)) (lfit sum_total1 ppop_average if old==1) || (scatter sum_total1 ppop_average if old==0, mlabel(iso)) (lfit sum_total1 ppop_average if old==0) 
 

** 3. Exclude manslaughter from aggregation
gen sum_nomans= sum_rob+sum_rape+sum_fraud+sum_drugp+sum_drugd

corr sum_total sum_nomans
*0.92
graph bar sum_total sum_nomans, over(country, sort(2) label(angle(45)))
sum sum_nomans, detail

*correlation with prison population 
corr sum_nomans ppop_average
*0.08
corr sum_nomans ppop_average if old==1
*0.38
corr sum_nomans ppop_average if old==0
*-0.48
twoway (scatter sum_nomans ppop_average if old==1, mlabel(iso)) (lfit sum_nomans ppop_average if old==1) || (scatter sum_nomans ppop_average if old==0, mlabel(iso)) (lfit sum_nomans ppop_average if old==0) 


** 4. Index of robbery alone
corr sum_total sum_rob
*0.73
graph bar sum_total sum_rob, over(country, sort(2) label(angle(45)))
sum sum_rob, detail

*correlation with prison population 
corr sum_rob ppop_average
*-0.13
corr sum_rob ppop_average if old==1
*-0.0049
corr sum_rob ppop_average if old==0
*-0.6
twoway (scatter sum_rob ppop_average if old==1, mlabel(iso)) (lfit sum_rob ppop_average if old==1) || (scatter sum_rob ppop_average if old==0, mlabel(iso)) (lfit sum_rob ppop_average if old==0)


** 5. Distinction between violent and non-violent crime
gen sum_total_violent=sum_rob+sum_rape+sum_mans
gen sum_total_nonviolent=sum_fraud+sum_drugp+sum_drugd

corr sum_total_nonviolent ppop_average
*0.2
corr sum_total_nonviolent ppop_average if old==1
*0.47
corr sum_total_nonviolent ppop_average if old==0
*-0.28

corr sum_total_violent ppop_average
*-0.04
corr sum_total_violent ppop_average if old==1
*0.38
corr sum_total_violent ppop_average if old==0
*-0.66
